/*
    Purpose: Using the 1920-1980 Censuses,
             this file first creates a template
             with every occupation x race x south
             x year cell. All years from 1920 to 
             1989 are included in the template.
             Median family size *by decade* 
             and by occ x race x south (as 
             calculated in 1_) are then merged
             into the template. A weighted average of 
             median family size is then created using the 
             two Censuses closest to the cell year.
             (i.e., if the cell year is 1921, then 
             a weighted average of 1920 median family
             size and 1930 median family size is taken.) 

    Note: Average weighted median family income is
    	  calculated for 1920-1989 because this info
    	  will be merged to Jácome et al. survey 
    	  respondents, who turned 10 between 1920 and 1989.

    Creates: MedianFamilySize_byage10.dta
*/

clear
set more off
cd "$Mydirectory1/1_DataSources/CensusData/"

/* Make template with every occupation x race x south x year cell,
   where the year ranges from 1920 to 1989. */
	use ./output/NumberChildren_byCensus.dta, clear
	drop if year_census==1910 | year_census==1990
	
	keep year_census occ1950ej

	bysort year_census occ1950ej: keep if _n==1
	
	expand 10
	bysort year occ1950ej: gen number1 = _n-1
	gen age10 = year_census + number1
	
	gen race=1
	expand 2 
	bysort age10 occ1950ej: replace race=2 if _n==1
	tab race
	
	gen south_merge=0
	expand 2 
	bysort age10 occ1950ej race: replace south_merge=1 if _n==2
	
	rename year_census decade1
	gen decade2 = decade1+10
	gen number2 = 10-number
	
	order decade1 decade2 number1 number2, after(south_merge)
	
	tempfile template
	save `template'
	
* Bring in median family size by decade
	use ./output/NumberChildren_byCensus.dta, clear

	local family "number_member_family"
	keep occ1950ej race south_merge year_census `family' 

	//Keep tempfile with median family info for first of two closest Censuses
	preserve
		keep if year_census>=1920 & year_census<=1980

		rename year_census decade1
		rename `family' `family'1
		
		tempfile decade1
		save `decade1'
	restore 
	
	//Keep tempfile with median family info for second of two closest Censuses
	preserve
		keep if year_census>=1930 & year_census<=1990

		rename year_census decade2
		rename `family' `family'2
		
		tempfile decade2
		save `decade2'
	restore 
	
* Now merge both tempfiles into the template with every occ x race x south x year cell
	use `template', clear
	
	merge m:1 decade1 occ1950ej race south_merge using `decade1'
	drop _merge
	
	merge m:1 decade2 occ1950ej race south_merge using `decade2'
	drop _merge

* Find the weighted average of the two Censuses
	gen `family' = ((number2/10)*`family'1) + ((number1/10)*`family'2)

	drop `family'1 `family'2 decade* number1 number2
	

* Save 
	rename occ1950ej fatheroccej
	label var number_member_family "Median fam. size in occXraceXsouth for given year (interpolated)"
	
	compress 
	save ./output/MedianFamilySize_byage10.dta, replace
